Single cell gene correlation with neuropathology markers

C9

In excitatory neurons, correlations between cell-type markers and significantly deregulated genes in FTLD-C9 highlighted a consistent association with STMN2 expression, a gene central to axonal maintenance.

Across several RORB-characterized excitatory subtypes, including RORB/FOXO1, RORB/LRRK1 and RORB/ADGRL4, and PCP4/NXPH2, strong negative correlations with STMN2 were observed. Among these, the RORB/FOXO1 gene C9orf72 correlation with STMN2 (ρ = −0.99, p = 0,016), the RORB/LRRK1 genes STMN2 (ρ = −0.85, p = 0,0016), NPTX2 (ρ = −0.73, p = 0,013), C9orf72 (ρ = -0,85, p = 0,0016), SPP1 (ρ = −0.8, p = 0,005), the RORB/ADGRL4 C9orf72(ρ = -0,66, p = 0,0308), STMN2(ρ = -0,74, p = 0,011) and PCP4/NXPH2 STMN2 (ρ =-0.84, p = 0.001).

THEMIS- characterized cells also shown genes with STMN2 marker correlation. THEMIS/NR4A2 STMN2 (ρ =-0.65, p = 0.029), THEMIS/TMEM233 C9orf72(ρ =-0.66, p = 0.025), NPTX2(ρ =-0.63, p = 0.034) and STMN2(ρ =-0.75, p = 0.01), TLE4/MEGF11 STMN2(ρ =-0.65, p = 0.028) and TLE4/SEMA3D C9orf72(ρ =0.61, p = 0.046) and CX3CR1(ρ =0.82, p = 0.0016) .

These findings indicate that, although excitatory neuron proportions defined by RORB expression do not differ between FTLD-C9 and controls, their association with STMN2 expression reflects a dynamic remodeling of excitatory circuits along disease progression, suggesting that excitatory neuronal dysfunction is intimately linked to STMN2-associated pathology.

Only THEMIS/TMEM233 STMN2 expression shown correlation with pTDP43 marker (ρ = 0.809, p = 0.021). In non-neuronal populations, significant correlations with STMN2 expression were detected in oligodendrocytes. In it both C9orf72 (ρ = −0.64, p = 0.03) and STMN2 (ρ = −0.63, p = 0.04) were negatively correlated with STMN2, further reinforcing the presence of interactions in STMN2-dependent processes.

Code
library(readxl)
library(dplyr)
library(DT)

# ============================================================
# LOAD DATA
# ============================================================

file_path <- "/media/jaumatell/datos/URI/BAYESPRISM_12_3/RESULTS_ORDERED/6. GENES OF INTERST CORRELATION/C9_genes_correlation_allCovs.csv"

filtered <- read.csv(file_path)

# ============================================================
# CLEAN COLUMN NAMES IF NEEDED
# Some sheets have "NA." or "rho" in first column.
# Keep only meaningful columns: Cell, Gene, Spearman, p_value.
# ============================================================

# Try to detect the first unnamed column (if present)
first_col <- colnames(filtered)[1]
if (first_col %in% c("NA.", "rho", "rho1", "", NA)) {
  filtered <- filtered %>% select(-1)
}

# ============================================================
# FILTERABLE HTML TABLE FOR QUARTO
# ============================================================

datatable(
  filtered,
  filter = "top",
  options = list(
    scrollX = TRUE,
    pageLength = 25,
    dom = "frtip"   # no download buttons
  )
)

pTDP43

Code
# ============================================================
# INTERACTIVE SCATTERPLOTS – ONLY pTDP43, SELECT CELL + GENE
# ============================================================

library(dplyr)
library(tidyverse)
library(readxl)
library(plotly)

# ---- Safe Spearman correlation ----
safe_cor <- function(x, y){
  tryCatch({
    if(all(is.na(x)) | all(is.na(y))) return(list(estimate = NA, p.value = NA))
    res <- cor.test(x, y, method = "spearman")
    list(estimate = as.numeric(res$estimate), p.value = res$p.value)
  }, error = function(e) list(estimate = NA, p.value = NA))
}

# ============================================================
# ---- LOAD DATA ------------------------------------------------
# ============================================================

Expression_per_cell_directory <- "/media/jaumatell/datos/URI/BAYESPRISM_12_3/BAYESPRISM/FTLD/C9/CELL STATE ORIGINAL/"
OG_Covariables <- read.csv("/media/jaumatell/datos/URI/BAYESPRISM_12_3/CORRELATION/COVARIABLES/FTD_C9_neuropath_SOM.csv")
OG_Covariables$X <- gsub("^X", "", OG_Covariables$X)

cells <- c("CUX2_RASGRF2","CUX2_RORB","SCN4B_NEFH","RORB_FOXO1",
           "RORB_POU3F2","RORB_ADGRL4","RORB_LRRK1","PCP4_NXPH2",
           "VAT1L_EYA4","VAT1L_THSD4","THEMIS_NR4A2","THEMIS_TMEM233",
           "TLE4_CCBE1","TLE4_MEGF11","TLE4_SEMA3D")

genes <- c("STMN2","NPTX2","C9orf72","CXCL10","SPP1","CX3CR1")
covariables <- "pTDP43"   # <<< FORCE only pTDP43

# ============================================================
# ---- COMPUTE CORRELATIONS ----------------------------------
# ============================================================

results <- data.frame(Cell = character(),
                      Gene = character(),
                      Covariable = character(),
                      Spearman = numeric(),
                      p_value = numeric(),
                      stringsAsFactors = FALSE)

for (cell in cells) {
  file_path <- file.path(Expression_per_cell_directory, paste0(cell, ".csv"))
  if (!file.exists(file_path)) next
  
  data <- read.csv(file_path, row.names = "X")
  rownames(data) <- gsub("^X", "", rownames(data))
  
  for (gene in genes) {
    if (!(gene %in% colnames(data))) next
    
    common_ids <- intersect(rownames(data), OG_Covariables$X)
    subset_gene <- data[common_ids, gene, drop = FALSE]
    subset_cov  <- OG_Covariables[match(common_ids, OG_Covariables$X), ]
    
    result_corr <- safe_cor(subset_gene[[1]], subset_cov[, "pTDP43"])
    
    results <- rbind(results,
                     data.frame(Cell = cell,
                                Gene = gene,
                                Covariable = "pTDP43",
                                Spearman = result_corr$estimate,
                                p_value = result_corr$p.value))
  }
}

results <- results %>% drop_na(Spearman)


# ============================================================
# ---- BUILD SCATTER DATA ------------------------------------
# ============================================================

all_traces <- list()
meta <- tibble(trace_id = integer(), Cell = character(), Gene = character())
trace_index <- 0

for (cell in unique(results$Cell)) {
  
  expr_path <- file.path(Expression_per_cell_directory, paste0(cell, ".csv"))
  if (!file.exists(expr_path)) next
  
  expr_df <- read.csv(expr_path, row.names = 1)
  rownames(expr_df) <- gsub("^X", "", rownames(expr_df))
  
  cov_df <- OG_Covariables[, c("X", "pTDP43")]
  cov_df$X <- gsub("^X", "", cov_df$X)
  cov_df <- cov_df[!is.na(cov_df$pTDP43), ]
  
  for (gene in unique(results$Gene)) {
    if (!(gene %in% colnames(expr_df))) next
    
    common_ids <- intersect(rownames(expr_df), cov_df$X)
    if (length(common_ids) < 3) next
    
    df <- tibble(
      sample = common_ids,
      x = cov_df[match(common_ids, cov_df$X), "pTDP43"],
      y = expr_df[common_ids, gene]
    ) %>% drop_na()
    
    if (nrow(df) < 3 || sd(df$x) == 0 || sd(df$y) == 0) next
    
    rho  <- suppressWarnings(cor(df$x, df$y, method = "spearman"))
    pval <- suppressWarnings(cor.test(df$x, df$y, method = "spearman")$p.value)
    
    lmfit <- lm(y ~ x, data = df)
    line_df <- tibble(x = seq(min(df$x), max(df$x), length.out = 100))
    line_df$y <- predict(lmfit, newdata = line_df)
    
    # Scatter
    trace_index <- trace_index + 1
    all_traces[[trace_index]] <- list(
      type = "scatter",
      mode = "markers",
      x = df$x,
      y = df$y,
      name = paste(cell, gene),
      text = paste0("<b>", gene, " in ", cell, "</b><br>ρ = ", round(rho, 3), "<br>p = ", signif(pval, 3)),
      hoverinfo = "text",
      marker = list(size = 7, opacity = 0.75),
      visible = FALSE
    )
    
    # Regression line
    trace_index <- trace_index + 1
    all_traces[[trace_index]] <- list(
      type = "scatter",
      mode = "lines",
      x = line_df$x,
      y = line_df$y,
      line = list(width = 2),
      hoverinfo = "none",
      visible = FALSE
    )
    
    meta <- add_row(meta, trace_id = trace_index - 1,
                    Cell = cell, Gene = gene)
  }
}

# ============================================================
# ---- VISIBILITY HELPER ------------------------------------
# ============================================================

make_vis <- function(cell_sel, gene_sel) {
  vis <- rep(FALSE, length(all_traces))
  combo <- which(meta$Cell == cell_sel & meta$Gene == gene_sel)
  if (length(combo) == 1) {
    idx <- (combo - 1) * 2 + 1
    vis[idx:(idx + 1)] <- TRUE
  }
  vis
}

# ============================================================
# ---- DROPDOWNS: CELL + GENE ONLY ---------------------------
# ============================================================

cells_list <- unique(meta$Cell)
genes_list <- unique(meta$Gene)

buttons_cell <- lapply(cells_list, function(c) {
  list(
    method = "update",
    args = list(list(visible = make_vis(c, genes_list[1])),
                list(title = paste("Cell:", c))),
    label = c
  )
})

buttons_gene <- lapply(genes_list, function(g) {
  list(
    method = "update",
    args = list(list(visible = make_vis(cells_list[1], g)),
                list(title = paste("Gene:", g))),
    label = g
  )
})

# ============================================================
# ---- INITIAL VISIBILITY ------------------------------------
# ============================================================

vis_init <- make_vis(cells_list[1], genes_list[1])
for (k in seq_along(all_traces)) all_traces[[k]]$visible <- vis_init[k]

# ============================================================
# ---- BUILD FIGURE ------------------------------------------
# ============================================================

fig <- plot_ly()
for (tr in all_traces) {
  fig <- fig %>% add_trace(
    type = tr$type,
    mode = tr$mode,
    x = tr$x,
    y = tr$y,
    name = tr$name,
    text = tr$text,
    hoverinfo = tr$hoverinfo,
    marker = tr$marker,
    line = tr$line,
    visible = tr$visible
  )
}

fig <- fig %>%
  layout(
    title = list(text = "pTDP43 Correlation Scatterplots", x = 0.05),
    xaxis = list(title = "pTDP43"),
    yaxis = list(title = "Gene expression"),
    showlegend = FALSE,
    updatemenus = list(
      list(
        y = 1.15, x = 0,
        buttons = buttons_cell,
        direction = "down",
        showactive = TRUE,
        xanchor = "left", yanchor = "top",
        title = list(text = "Cell State")
      ),
      list(
        y = 1.15, x = 0.35,
        buttons = buttons_gene,
        direction = "down",
        showactive = TRUE,
        xanchor = "left", yanchor = "top",
        title = list(text = "Gene")
      )
    )
  )

fig

ACSL3

Code
# ============================================================
# INTERACTIVE SCATTERPLOTS – ONLY pTDP43, SELECT CELL + GENE
# ============================================================

library(dplyr)
library(tidyverse)
library(readxl)
library(plotly)

# ---- Safe Spearman correlation ----
safe_cor <- function(x, y){
  tryCatch({
    if(all(is.na(x)) | all(is.na(y))) return(list(estimate = NA, p.value = NA))
    res <- cor.test(x, y, method = "spearman")
    list(estimate = as.numeric(res$estimate), p.value = res$p.value)
  }, error = function(e) list(estimate = NA, p.value = NA))
}

# ============================================================
# ---- LOAD DATA ------------------------------------------------
# ============================================================

Expression_per_cell_directory <- "/media/jaumatell/datos/URI/BAYESPRISM_12_3/BAYESPRISM/FTLD/C9/CELL STATE ORIGINAL/"
OG_Covariables <- read.csv("/media/jaumatell/datos/URI/BAYESPRISM_12_3/CORRELATION/COVARIABLES/FTD_C9_neuropath_SOM.csv")
OG_Covariables$X <- gsub("^X", "", OG_Covariables$X)

cells <- c("CUX2_RASGRF2","CUX2_RORB","SCN4B_NEFH","RORB_FOXO1",
           "RORB_POU3F2","RORB_ADGRL4","RORB_LRRK1","PCP4_NXPH2",
           "VAT1L_EYA4","VAT1L_THSD4","THEMIS_NR4A2","THEMIS_TMEM233",
           "TLE4_CCBE1","TLE4_MEGF11","TLE4_SEMA3D")

genes <- c("STMN2","NPTX2","C9orf72","CXCL10","SPP1","CX3CR1")
covariables <- "ACSL3"   # <<< FORCE only pTDP43

# ============================================================
# ---- COMPUTE CORRELATIONS ----------------------------------
# ============================================================

results <- data.frame(Cell = character(),
                      Gene = character(),
                      Covariable = character(),
                      Spearman = numeric(),
                      p_value = numeric(),
                      stringsAsFactors = FALSE)

for (cell in cells) {
  file_path <- file.path(Expression_per_cell_directory, paste0(cell, ".csv"))
  if (!file.exists(file_path)) next
  
  data <- read.csv(file_path, row.names = "X")
  rownames(data) <- gsub("^X", "", rownames(data))
  
  for (gene in genes) {
    if (!(gene %in% colnames(data))) next
    
    common_ids <- intersect(rownames(data), OG_Covariables$X)
    subset_gene <- data[common_ids, gene, drop = FALSE]
    subset_cov  <- OG_Covariables[match(common_ids, OG_Covariables$X), ]
    
    result_corr <- safe_cor(subset_gene[[1]], subset_cov[, "pTDP43"])
    
    results <- rbind(results,
                     data.frame(Cell = cell,
                                Gene = gene,
                                Covariable = "pTDP43",
                                Spearman = result_corr$estimate,
                                p_value = result_corr$p.value))
  }
}

results <- results %>% drop_na(Spearman)


# ============================================================
# ---- BUILD SCATTER DATA ------------------------------------
# ============================================================

all_traces <- list()
meta <- tibble(trace_id = integer(), Cell = character(), Gene = character())
trace_index <- 0

for (cell in unique(results$Cell)) {
  
  expr_path <- file.path(Expression_per_cell_directory, paste0(cell, ".csv"))
  if (!file.exists(expr_path)) next
  
  expr_df <- read.csv(expr_path, row.names = 1)
  rownames(expr_df) <- gsub("^X", "", rownames(expr_df))
  
  cov_df <- OG_Covariables[, c("X", "pTDP43")]
  cov_df$X <- gsub("^X", "", cov_df$X)
  cov_df <- cov_df[!is.na(cov_df$pTDP43), ]
  
  for (gene in unique(results$Gene)) {
    if (!(gene %in% colnames(expr_df))) next
    
    common_ids <- intersect(rownames(expr_df), cov_df$X)
    if (length(common_ids) < 3) next
    
    df <- tibble(
      sample = common_ids,
      x = cov_df[match(common_ids, cov_df$X), "pTDP43"],
      y = expr_df[common_ids, gene]
    ) %>% drop_na()
    
    if (nrow(df) < 3 || sd(df$x) == 0 || sd(df$y) == 0) next
    
    rho  <- suppressWarnings(cor(df$x, df$y, method = "spearman"))
    pval <- suppressWarnings(cor.test(df$x, df$y, method = "spearman")$p.value)
    
    lmfit <- lm(y ~ x, data = df)
    line_df <- tibble(x = seq(min(df$x), max(df$x), length.out = 100))
    line_df$y <- predict(lmfit, newdata = line_df)
    
    # Scatter
    trace_index <- trace_index + 1
    all_traces[[trace_index]] <- list(
      type = "scatter",
      mode = "markers",
      x = df$x,
      y = df$y,
      name = paste(cell, gene),
      text = paste0("<b>", gene, " in ", cell, "</b><br>ρ = ", round(rho, 3), "<br>p = ", signif(pval, 3)),
      hoverinfo = "text",
      marker = list(size = 7, opacity = 0.75),
      visible = FALSE
    )
    
    # Regression line
    trace_index <- trace_index + 1
    all_traces[[trace_index]] <- list(
      type = "scatter",
      mode = "lines",
      x = line_df$x,
      y = line_df$y,
      line = list(width = 2),
      hoverinfo = "none",
      visible = FALSE
    )
    
    meta <- add_row(meta, trace_id = trace_index - 1,
                    Cell = cell, Gene = gene)
  }
}

# ============================================================
# ---- VISIBILITY HELPER ------------------------------------
# ============================================================

make_vis <- function(cell_sel, gene_sel) {
  vis <- rep(FALSE, length(all_traces))
  combo <- which(meta$Cell == cell_sel & meta$Gene == gene_sel)
  if (length(combo) == 1) {
    idx <- (combo - 1) * 2 + 1
    vis[idx:(idx + 1)] <- TRUE
  }
  vis
}

# ============================================================
# ---- DROPDOWNS: CELL + GENE ONLY ---------------------------
# ============================================================

cells_list <- unique(meta$Cell)
genes_list <- unique(meta$Gene)

buttons_cell <- lapply(cells_list, function(c) {
  list(
    method = "update",
    args = list(list(visible = make_vis(c, genes_list[1])),
                list(title = paste("Cell:", c))),
    label = c
  )
})

buttons_gene <- lapply(genes_list, function(g) {
  list(
    method = "update",
    args = list(list(visible = make_vis(cells_list[1], g)),
                list(title = paste("Gene:", g))),
    label = g
  )
})

# ============================================================
# ---- INITIAL VISIBILITY ------------------------------------
# ============================================================

vis_init <- make_vis(cells_list[1], genes_list[1])
for (k in seq_along(all_traces)) all_traces[[k]]$visible <- vis_init[k]

# ============================================================
# ---- BUILD FIGURE ------------------------------------------
# ============================================================

fig <- plot_ly()
for (tr in all_traces) {
  fig <- fig %>% add_trace(
    type = tr$type,
    mode = tr$mode,
    x = tr$x,
    y = tr$y,
    name = tr$name,
    text = tr$text,
    hoverinfo = tr$hoverinfo,
    marker = tr$marker,
    line = tr$line,
    visible = tr$visible
  )
}

fig <- fig %>%
  layout(
    title = list(text = "pTDP43 Correlation Scatterplots", x = 0.05),
    xaxis = list(title = "pTDP43"),
    yaxis = list(title = "Gene expression"),
    showlegend = FALSE,
    updatemenus = list(
      list(
        y = 1.15, x = 0,
        buttons = buttons_cell,
        direction = "down",
        showactive = TRUE,
        xanchor = "left", yanchor = "top",
        title = list(text = "Cell State")
      ),
      list(
        y = 1.15, x = 0.35,
        buttons = buttons_gene,
        direction = "down",
        showactive = TRUE,
        xanchor = "left", yanchor = "top",
        title = list(text = "Gene")
      )
    )
  )

fig

lncRNA

Code
# ============================================================
# INTERACTIVE SCATTERPLOTS – ONLY pTDP43, SELECT CELL + GENE
# ============================================================

library(dplyr)
library(tidyverse)
library(readxl)
library(plotly)

# ---- Safe Spearman correlation ----
safe_cor <- function(x, y){
  tryCatch({
    if(all(is.na(x)) | all(is.na(y))) return(list(estimate = NA, p.value = NA))
    res <- cor.test(x, y, method = "spearman")
    list(estimate = as.numeric(res$estimate), p.value = res$p.value)
  }, error = function(e) list(estimate = NA, p.value = NA))
}

# ============================================================
# ---- LOAD DATA ------------------------------------------------
# ============================================================

Expression_per_cell_directory <- "/media/jaumatell/datos/URI/BAYESPRISM_12_3/BAYESPRISM/FTLD/C9/CELL STATE ORIGINAL/"
OG_Covariables <- read.csv("/media/jaumatell/datos/URI/BAYESPRISM_12_3/CORRELATION/COVARIABLES/FTD_C9_neuropath_SOM.csv")
OG_Covariables$X <- gsub("^X", "", OG_Covariables$X)

cells <- c("CUX2_RASGRF2","CUX2_RORB","SCN4B_NEFH","RORB_FOXO1",
           "RORB_POU3F2","RORB_ADGRL4","RORB_LRRK1","PCP4_NXPH2",
           "VAT1L_EYA4","VAT1L_THSD4","THEMIS_NR4A2","THEMIS_TMEM233",
           "TLE4_CCBE1","TLE4_MEGF11","TLE4_SEMA3D")

genes <- c("STMN2","NPTX2","C9orf72","CXCL10","SPP1","CX3CR1")
covariables <- "lncRNA"   # <<< FORCE only pTDP43

# ============================================================
# ---- COMPUTE CORRELATIONS ----------------------------------
# ============================================================

results <- data.frame(Cell = character(),
                      Gene = character(),
                      Covariable = character(),
                      Spearman = numeric(),
                      p_value = numeric(),
                      stringsAsFactors = FALSE)

for (cell in cells) {
  file_path <- file.path(Expression_per_cell_directory, paste0(cell, ".csv"))
  if (!file.exists(file_path)) next
  
  data <- read.csv(file_path, row.names = "X")
  rownames(data) <- gsub("^X", "", rownames(data))
  
  for (gene in genes) {
    if (!(gene %in% colnames(data))) next
    
    common_ids <- intersect(rownames(data), OG_Covariables$X)
    subset_gene <- data[common_ids, gene, drop = FALSE]
    subset_cov  <- OG_Covariables[match(common_ids, OG_Covariables$X), ]
    
    result_corr <- safe_cor(subset_gene[[1]], subset_cov[, "pTDP43"])
    
    results <- rbind(results,
                     data.frame(Cell = cell,
                                Gene = gene,
                                Covariable = "pTDP43",
                                Spearman = result_corr$estimate,
                                p_value = result_corr$p.value))
  }
}

results <- results %>% drop_na(Spearman)


# ============================================================
# ---- BUILD SCATTER DATA ------------------------------------
# ============================================================

all_traces <- list()
meta <- tibble(trace_id = integer(), Cell = character(), Gene = character())
trace_index <- 0

for (cell in unique(results$Cell)) {
  
  expr_path <- file.path(Expression_per_cell_directory, paste0(cell, ".csv"))
  if (!file.exists(expr_path)) next
  
  expr_df <- read.csv(expr_path, row.names = 1)
  rownames(expr_df) <- gsub("^X", "", rownames(expr_df))
  
  cov_df <- OG_Covariables[, c("X", "pTDP43")]
  cov_df$X <- gsub("^X", "", cov_df$X)
  cov_df <- cov_df[!is.na(cov_df$pTDP43), ]
  
  for (gene in unique(results$Gene)) {
    if (!(gene %in% colnames(expr_df))) next
    
    common_ids <- intersect(rownames(expr_df), cov_df$X)
    if (length(common_ids) < 3) next
    
    df <- tibble(
      sample = common_ids,
      x = cov_df[match(common_ids, cov_df$X), "pTDP43"],
      y = expr_df[common_ids, gene]
    ) %>% drop_na()
    
    if (nrow(df) < 3 || sd(df$x) == 0 || sd(df$y) == 0) next
    
    rho  <- suppressWarnings(cor(df$x, df$y, method = "spearman"))
    pval <- suppressWarnings(cor.test(df$x, df$y, method = "spearman")$p.value)
    
    lmfit <- lm(y ~ x, data = df)
    line_df <- tibble(x = seq(min(df$x), max(df$x), length.out = 100))
    line_df$y <- predict(lmfit, newdata = line_df)
    
    # Scatter
    trace_index <- trace_index + 1
    all_traces[[trace_index]] <- list(
      type = "scatter",
      mode = "markers",
      x = df$x,
      y = df$y,
      name = paste(cell, gene),
      text = paste0("<b>", gene, " in ", cell, "</b><br>ρ = ", round(rho, 3), "<br>p = ", signif(pval, 3)),
      hoverinfo = "text",
      marker = list(size = 7, opacity = 0.75),
      visible = FALSE
    )
    
    # Regression line
    trace_index <- trace_index + 1
    all_traces[[trace_index]] <- list(
      type = "scatter",
      mode = "lines",
      x = line_df$x,
      y = line_df$y,
      line = list(width = 2),
      hoverinfo = "none",
      visible = FALSE
    )
    
    meta <- add_row(meta, trace_id = trace_index - 1,
                    Cell = cell, Gene = gene)
  }
}

# ============================================================
# ---- VISIBILITY HELPER ------------------------------------
# ============================================================

make_vis <- function(cell_sel, gene_sel) {
  vis <- rep(FALSE, length(all_traces))
  combo <- which(meta$Cell == cell_sel & meta$Gene == gene_sel)
  if (length(combo) == 1) {
    idx <- (combo - 1) * 2 + 1
    vis[idx:(idx + 1)] <- TRUE
  }
  vis
}

# ============================================================
# ---- DROPDOWNS: CELL + GENE ONLY ---------------------------
# ============================================================

cells_list <- unique(meta$Cell)
genes_list <- unique(meta$Gene)

buttons_cell <- lapply(cells_list, function(c) {
  list(
    method = "update",
    args = list(list(visible = make_vis(c, genes_list[1])),
                list(title = paste("Cell:", c))),
    label = c
  )
})

buttons_gene <- lapply(genes_list, function(g) {
  list(
    method = "update",
    args = list(list(visible = make_vis(cells_list[1], g)),
                list(title = paste("Gene:", g))),
    label = g
  )
})

# ============================================================
# ---- INITIAL VISIBILITY ------------------------------------
# ============================================================

vis_init <- make_vis(cells_list[1], genes_list[1])
for (k in seq_along(all_traces)) all_traces[[k]]$visible <- vis_init[k]

# ============================================================
# ---- BUILD FIGURE ------------------------------------------
# ============================================================

fig <- plot_ly()
for (tr in all_traces) {
  fig <- fig %>% add_trace(
    type = tr$type,
    mode = tr$mode,
    x = tr$x,
    y = tr$y,
    name = tr$name,
    text = tr$text,
    hoverinfo = tr$hoverinfo,
    marker = tr$marker,
    line = tr$line,
    visible = tr$visible
  )
}

fig <- fig %>%
  layout(
    title = list(text = "pTDP43 Correlation Scatterplots", x = 0.05),
    xaxis = list(title = "pTDP43"),
    yaxis = list(title = "Gene expression"),
    showlegend = FALSE,
    updatemenus = list(
      list(
        y = 1.15, x = 0,
        buttons = buttons_cell,
        direction = "down",
        showactive = TRUE,
        xanchor = "left", yanchor = "top",
        title = list(text = "Cell State")
      ),
      list(
        y = 1.15, x = 0.35,
        buttons = buttons_gene,
        direction = "down",
        showactive = TRUE,
        xanchor = "left", yanchor = "top",
        title = list(text = "Gene")
      )
    )
  )

fig

STMN2

Code
# ============================================================
# INTERACTIVE SCATTERPLOTS – ONLY pTDP43, SELECT CELL + GENE
# ============================================================

library(dplyr)
library(tidyverse)
library(readxl)
library(plotly)

# ---- Safe Spearman correlation ----
safe_cor <- function(x, y){
  tryCatch({
    if(all(is.na(x)) | all(is.na(y))) return(list(estimate = NA, p.value = NA))
    res <- cor.test(x, y, method = "spearman")
    list(estimate = as.numeric(res$estimate), p.value = res$p.value)
  }, error = function(e) list(estimate = NA, p.value = NA))
}

# ============================================================
# ---- LOAD DATA ------------------------------------------------
# ============================================================

Expression_per_cell_directory <- "/media/jaumatell/datos/URI/BAYESPRISM_12_3/BAYESPRISM/FTLD/C9/CELL STATE ORIGINAL/"
OG_Covariables <- read.csv("/media/jaumatell/datos/URI/BAYESPRISM_12_3/CORRELATION/COVARIABLES/FTD_C9_neuropath_SOM.csv")
OG_Covariables$X <- gsub("^X", "", OG_Covariables$X)

cells <- c("CUX2_RASGRF2","CUX2_RORB","SCN4B_NEFH","RORB_FOXO1",
           "RORB_POU3F2","RORB_ADGRL4","RORB_LRRK1","PCP4_NXPH2",
           "VAT1L_EYA4","VAT1L_THSD4","THEMIS_NR4A2","THEMIS_TMEM233",
           "TLE4_CCBE1","TLE4_MEGF11","TLE4_SEMA3D")

genes <- c("STMN2","NPTX2","C9orf72","CXCL10","SPP1","CX3CR1")
covariables <- "STMN2"  

# ============================================================
# ---- COMPUTE CORRELATIONS ----------------------------------
# ============================================================

results <- data.frame(Cell = character(),
                      Gene = character(),
                      Covariable = character(),
                      Spearman = numeric(),
                      p_value = numeric(),
                      stringsAsFactors = FALSE)

for (cell in cells) {
  file_path <- file.path(Expression_per_cell_directory, paste0(cell, ".csv"))
  if (!file.exists(file_path)) next
  
  data <- read.csv(file_path, row.names = "X")
  rownames(data) <- gsub("^X", "", rownames(data))
  
  for (gene in genes) {
    if (!(gene %in% colnames(data))) next
    
    common_ids <- intersect(rownames(data), OG_Covariables$X)
    subset_gene <- data[common_ids, gene, drop = FALSE]
    subset_cov  <- OG_Covariables[match(common_ids, OG_Covariables$X), ]
    
    result_corr <- safe_cor(subset_gene[[1]], subset_cov[, "pTDP43"])
    
    results <- rbind(results,
                     data.frame(Cell = cell,
                                Gene = gene,
                                Covariable = "pTDP43",
                                Spearman = result_corr$estimate,
                                p_value = result_corr$p.value))
  }
}

results <- results %>% drop_na(Spearman)

# ============================================================
# ---- BUILD SCATTER DATA ------------------------------------
# ============================================================

all_traces <- list()
meta <- tibble(trace_id = integer(), Cell = character(), Gene = character())
trace_index <- 0

for (cell in unique(results$Cell)) {
  
  expr_path <- file.path(Expression_per_cell_directory, paste0(cell, ".csv"))
  if (!file.exists(expr_path)) next
  
  expr_df <- read.csv(expr_path, row.names = 1)
  rownames(expr_df) <- gsub("^X", "", rownames(expr_df))
  
  cov_df <- OG_Covariables[, c("X", "pTDP43")]
  cov_df$X <- gsub("^X", "", cov_df$X)
  cov_df <- cov_df[!is.na(cov_df$pTDP43), ]
  
  for (gene in unique(results$Gene)) {
    if (!(gene %in% colnames(expr_df))) next
    
    common_ids <- intersect(rownames(expr_df), cov_df$X)
    if (length(common_ids) < 3) next
    
    df <- tibble(
      sample = common_ids,
      x = cov_df[match(common_ids, cov_df$X), "pTDP43"],
      y = expr_df[common_ids, gene]
    ) %>% drop_na()
    
    if (nrow(df) < 3 || sd(df$x) == 0 || sd(df$y) == 0) next
    
    rho  <- suppressWarnings(cor(df$x, df$y, method = "spearman"))
    pval <- suppressWarnings(cor.test(df$x, df$y, method = "spearman")$p.value)
    
    lmfit <- lm(y ~ x, data = df)
    line_df <- tibble(x = seq(min(df$x), max(df$x), length.out = 100))
    line_df$y <- predict(lmfit, newdata = line_df)
    
    # Scatter
    trace_index <- trace_index + 1
    all_traces[[trace_index]] <- list(
      type = "scatter",
      mode = "markers",
      x = df$x,
      y = df$y,
      name = paste(cell, gene),
      text = paste0("<b>", gene, " in ", cell, "</b><br>ρ = ", round(rho, 3), "<br>p = ", signif(pval, 3)),
      hoverinfo = "text",
      marker = list(size = 7, opacity = 0.75),
      visible = FALSE
    )
    
    # Regression line
    trace_index <- trace_index + 1
    all_traces[[trace_index]] <- list(
      type = "scatter",
      mode = "lines",
      x = line_df$x,
      y = line_df$y,
      line = list(width = 2),
      hoverinfo = "none",
      visible = FALSE
    )
    
    meta <- add_row(meta, trace_id = trace_index - 1,
                    Cell = cell, Gene = gene)
  }
}

# ============================================================
# ---- VISIBILITY HELPER ------------------------------------
# ============================================================

make_vis <- function(cell_sel, gene_sel) {
  vis <- rep(FALSE, length(all_traces))
  combo <- which(meta$Cell == cell_sel & meta$Gene == gene_sel)
  if (length(combo) == 1) {
    idx <- (combo - 1) * 2 + 1
    vis[idx:(idx + 1)] <- TRUE
  }
  vis
}

# ============================================================
# ---- DROPDOWNS: CELL + GENE ONLY ---------------------------
# ============================================================

cells_list <- unique(meta$Cell)
genes_list <- unique(meta$Gene)

buttons_cell <- lapply(cells_list, function(c) {
  list(
    method = "update",
    args = list(list(visible = make_vis(c, genes_list[1])),
                list(title = paste("Cell:", c))),
    label = c
  )
})

buttons_gene <- lapply(genes_list, function(g) {
  list(
    method = "update",
    args = list(list(visible = make_vis(cells_list[1], g)),
                list(title = paste("Gene:", g))),
    label = g
  )
})

# ============================================================
# ---- INITIAL VISIBILITY ------------------------------------
# ============================================================

vis_init <- make_vis(cells_list[1], genes_list[1])
for (k in seq_along(all_traces)) all_traces[[k]]$visible <- vis_init[k]

# ============================================================
# ---- BUILD FIGURE ------------------------------------------
# ============================================================

fig <- plot_ly()
for (tr in all_traces) {
  fig <- fig %>% add_trace(
    type = tr$type,
    mode = tr$mode,
    x = tr$x,
    y = tr$y,
    name = tr$name,
    text = tr$text,
    hoverinfo = tr$hoverinfo,
    marker = tr$marker,
    line = tr$line,
    visible = tr$visible
  )
}

fig <- fig %>%
  layout(
    title = list(text = "pTDP43 Correlation Scatterplots", x = 0.05),
    xaxis = list(title = "pTDP43"),
    yaxis = list(title = "Gene expression"),
    showlegend = FALSE,
    updatemenus = list(
      list(
        y = 1.15, x = 0,
        buttons = buttons_cell,
        direction = "down",
        showactive = TRUE,
        xanchor = "left", yanchor = "top",
        title = list(text = "Cell State")
      ),
      list(
        y = 1.15, x = 0.35,
        buttons = buttons_gene,
        direction = "down",
        showactive = TRUE,
        xanchor = "left", yanchor = "top",
        title = list(text = "Gene")
      )
    )
  )

fig

fociSENSE

Code
# ============================================================
# INTERACTIVE SCATTERPLOTS – ONLY pTDP43, SELECT CELL + GENE
# ============================================================

library(dplyr)
library(tidyverse)
library(readxl)
library(plotly)

# ---- Safe Spearman correlation ----
safe_cor <- function(x, y){
  tryCatch({
    if(all(is.na(x)) | all(is.na(y))) return(list(estimate = NA, p.value = NA))
    res <- cor.test(x, y, method = "spearman")
    list(estimate = as.numeric(res$estimate), p.value = res$p.value)
  }, error = function(e) list(estimate = NA, p.value = NA))
}

# ============================================================
# ---- LOAD DATA ------------------------------------------------
# ============================================================

Expression_per_cell_directory <- "/media/jaumatell/datos/URI/BAYESPRISM_12_3/BAYESPRISM/FTLD/C9/CELL STATE ORIGINAL/"
OG_Covariables <- read.csv("/media/jaumatell/datos/URI/BAYESPRISM_12_3/CORRELATION/COVARIABLES/FTD_C9_neuropath_SOM.csv")
OG_Covariables$X <- gsub("^X", "", OG_Covariables$X)

cells <- c("CUX2_RASGRF2","CUX2_RORB","SCN4B_NEFH","RORB_FOXO1",
           "RORB_POU3F2","RORB_ADGRL4","RORB_LRRK1","PCP4_NXPH2",
           "VAT1L_EYA4","VAT1L_THSD4","THEMIS_NR4A2","THEMIS_TMEM233",
           "TLE4_CCBE1","TLE4_MEGF11","TLE4_SEMA3D")

genes <- c("STMN2","NPTX2","C9orf72","CXCL10","SPP1","CX3CR1")
covariables <- "fociSENSE"   # <<< FORCE only pTDP43

# ============================================================
# ---- COMPUTE CORRELATIONS ----------------------------------
# ============================================================

results <- data.frame(Cell = character(),
                      Gene = character(),
                      Covariable = character(),
                      Spearman = numeric(),
                      p_value = numeric(),
                      stringsAsFactors = FALSE)

for (cell in cells) {
  file_path <- file.path(Expression_per_cell_directory, paste0(cell, ".csv"))
  if (!file.exists(file_path)) next
  
  data <- read.csv(file_path, row.names = "X")
  rownames(data) <- gsub("^X", "", rownames(data))
  
  for (gene in genes) {
    if (!(gene %in% colnames(data))) next
    
    common_ids <- intersect(rownames(data), OG_Covariables$X)
    subset_gene <- data[common_ids, gene, drop = FALSE]
    subset_cov  <- OG_Covariables[match(common_ids, OG_Covariables$X), ]
    
    result_corr <- safe_cor(subset_gene[[1]], subset_cov[, "pTDP43"])
    
    results <- rbind(results,
                     data.frame(Cell = cell,
                                Gene = gene,
                                Covariable = "pTDP43",
                                Spearman = result_corr$estimate,
                                p_value = result_corr$p.value))
  }
}

results <- results %>% drop_na(Spearman)

# ============================================================
# ---- BUILD SCATTER DATA ------------------------------------
# ============================================================

all_traces <- list()
meta <- tibble(trace_id = integer(), Cell = character(), Gene = character())
trace_index <- 0

for (cell in unique(results$Cell)) {
  
  expr_path <- file.path(Expression_per_cell_directory, paste0(cell, ".csv"))
  if (!file.exists(expr_path)) next
  
  expr_df <- read.csv(expr_path, row.names = 1)
  rownames(expr_df) <- gsub("^X", "", rownames(expr_df))
  
  cov_df <- OG_Covariables[, c("X", "pTDP43")]
  cov_df$X <- gsub("^X", "", cov_df$X)
  cov_df <- cov_df[!is.na(cov_df$pTDP43), ]
  
  for (gene in unique(results$Gene)) {
    if (!(gene %in% colnames(expr_df))) next
    
    common_ids <- intersect(rownames(expr_df), cov_df$X)
    if (length(common_ids) < 3) next
    
    df <- tibble(
      sample = common_ids,
      x = cov_df[match(common_ids, cov_df$X), "pTDP43"],
      y = expr_df[common_ids, gene]
    ) %>% drop_na()
    
    if (nrow(df) < 3 || sd(df$x) == 0 || sd(df$y) == 0) next
    
    rho  <- suppressWarnings(cor(df$x, df$y, method = "spearman"))
    pval <- suppressWarnings(cor.test(df$x, df$y, method = "spearman")$p.value)
    
    lmfit <- lm(y ~ x, data = df)
    line_df <- tibble(x = seq(min(df$x), max(df$x), length.out = 100))
    line_df$y <- predict(lmfit, newdata = line_df)
    
    # Scatter
    trace_index <- trace_index + 1
    all_traces[[trace_index]] <- list(
      type = "scatter",
      mode = "markers",
      x = df$x,
      y = df$y,
      name = paste(cell, gene),
      text = paste0("<b>", gene, " in ", cell, "</b><br>ρ = ", round(rho, 3), "<br>p = ", signif(pval, 3)),
      hoverinfo = "text",
      marker = list(size = 7, opacity = 0.75),
      visible = FALSE
    )
    
    # Regression line
    trace_index <- trace_index + 1
    all_traces[[trace_index]] <- list(
      type = "scatter",
      mode = "lines",
      x = line_df$x,
      y = line_df$y,
      line = list(width = 2),
      hoverinfo = "none",
      visible = FALSE
    )
    
    meta <- add_row(meta, trace_id = trace_index - 1,
                    Cell = cell, Gene = gene)
  }
}

# ============================================================
# ---- VISIBILITY HELPER ------------------------------------
# ============================================================

make_vis <- function(cell_sel, gene_sel) {
  vis <- rep(FALSE, length(all_traces))
  combo <- which(meta$Cell == cell_sel & meta$Gene == gene_sel)
  if (length(combo) == 1) {
    idx <- (combo - 1) * 2 + 1
    vis[idx:(idx + 1)] <- TRUE
  }
  vis
}

# ============================================================
# ---- DROPDOWNS: CELL + GENE ONLY ---------------------------
# ============================================================

cells_list <- unique(meta$Cell)
genes_list <- unique(meta$Gene)

buttons_cell <- lapply(cells_list, function(c) {
  list(
    method = "update",
    args = list(list(visible = make_vis(c, genes_list[1])),
                list(title = paste("Cell:", c))),
    label = c
  )
})

buttons_gene <- lapply(genes_list, function(g) {
  list(
    method = "update",
    args = list(list(visible = make_vis(cells_list[1], g)),
                list(title = paste("Gene:", g))),
    label = g
  )
})

# ============================================================
# ---- INITIAL VISIBILITY ------------------------------------
# ============================================================

vis_init <- make_vis(cells_list[1], genes_list[1])
for (k in seq_along(all_traces)) all_traces[[k]]$visible <- vis_init[k]

# ============================================================
# ---- BUILD FIGURE ------------------------------------------
# ============================================================

fig <- plot_ly()
for (tr in all_traces) {
  fig <- fig %>% add_trace(
    type = tr$type,
    mode = tr$mode,
    x = tr$x,
    y = tr$y,
    name = tr$name,
    text = tr$text,
    hoverinfo = tr$hoverinfo,
    marker = tr$marker,
    line = tr$line,
    visible = tr$visible
  )
}

fig <- fig %>%
  layout(
    title = list(text = "pTDP43 Correlation Scatterplots", x = 0.05),
    xaxis = list(title = "pTDP43"),
    yaxis = list(title = "Gene expression"),
    showlegend = FALSE,
    updatemenus = list(
      list(
        y = 1.15, x = 0,
        buttons = buttons_cell,
        direction = "down",
        showactive = TRUE,
        xanchor = "left", yanchor = "top",
        title = list(text = "Cell State")
      ),
      list(
        y = 1.15, x = 0.35,
        buttons = buttons_gene,
        direction = "down",
        showactive = TRUE,
        xanchor = "left", yanchor = "top",
        title = list(text = "Gene")
      )
    )
  )

fig

fociANTI

Code
# ============================================================
# INTERACTIVE SCATTERPLOTS – ONLY pTDP43, SELECT CELL + GENE
# ============================================================

library(dplyr)
library(tidyverse)
library(readxl)
library(plotly)

# ---- Safe Spearman correlation ----
safe_cor <- function(x, y){
  tryCatch({
    if(all(is.na(x)) | all(is.na(y))) return(list(estimate = NA, p.value = NA))
    res <- cor.test(x, y, method = "spearman")
    list(estimate = as.numeric(res$estimate), p.value = res$p.value)
  }, error = function(e) list(estimate = NA, p.value = NA))
}

# ============================================================
# ---- LOAD DATA ------------------------------------------------
# ============================================================

Expression_per_cell_directory <- "/media/jaumatell/datos/URI/BAYESPRISM_12_3/BAYESPRISM/FTLD/C9/CELL STATE ORIGINAL/"
OG_Covariables <- read.csv("/media/jaumatell/datos/URI/BAYESPRISM_12_3/CORRELATION/COVARIABLES/FTD_C9_neuropath_SOM.csv")
OG_Covariables$X <- gsub("^X", "", OG_Covariables$X)

cells <- c("CUX2_RASGRF2","CUX2_RORB","SCN4B_NEFH","RORB_FOXO1",
           "RORB_POU3F2","RORB_ADGRL4","RORB_LRRK1","PCP4_NXPH2",
           "VAT1L_EYA4","VAT1L_THSD4","THEMIS_NR4A2","THEMIS_TMEM233",
           "TLE4_CCBE1","TLE4_MEGF11","TLE4_SEMA3D")

genes <- c("STMN2","NPTX2","C9orf72","CXCL10","SPP1","CX3CR1")
covariables <- "fociANTI"   

# ============================================================
# ---- COMPUTE CORRELATIONS ----------------------------------
# ============================================================

results <- data.frame(Cell = character(),
                      Gene = character(),
                      Covariable = character(),
                      Spearman = numeric(),
                      p_value = numeric(),
                      stringsAsFactors = FALSE)

for (cell in cells) {
  file_path <- file.path(Expression_per_cell_directory, paste0(cell, ".csv"))
  if (!file.exists(file_path)) next
  
  data <- read.csv(file_path, row.names = "X")
  rownames(data) <- gsub("^X", "", rownames(data))
  
  for (gene in genes) {
    if (!(gene %in% colnames(data))) next
    
    common_ids <- intersect(rownames(data), OG_Covariables$X)
    subset_gene <- data[common_ids, gene, drop = FALSE]
    subset_cov  <- OG_Covariables[match(common_ids, OG_Covariables$X), ]
    
    result_corr <- safe_cor(subset_gene[[1]], subset_cov[, "pTDP43"])
    
    results <- rbind(results,
                     data.frame(Cell = cell,
                                Gene = gene,
                                Covariable = "pTDP43",
                                Spearman = result_corr$estimate,
                                p_value = result_corr$p.value))
  }
}

results <- results %>% drop_na(Spearman)

# ============================================================
# ---- BUILD SCATTER DATA ------------------------------------
# ============================================================

all_traces <- list()
meta <- tibble(trace_id = integer(), Cell = character(), Gene = character())
trace_index <- 0

for (cell in unique(results$Cell)) {
  
  expr_path <- file.path(Expression_per_cell_directory, paste0(cell, ".csv"))
  if (!file.exists(expr_path)) next
  
  expr_df <- read.csv(expr_path, row.names = 1)
  rownames(expr_df) <- gsub("^X", "", rownames(expr_df))
  
  cov_df <- OG_Covariables[, c("X", "pTDP43")]
  cov_df$X <- gsub("^X", "", cov_df$X)
  cov_df <- cov_df[!is.na(cov_df$pTDP43), ]
  
  for (gene in unique(results$Gene)) {
    if (!(gene %in% colnames(expr_df))) next
    
    common_ids <- intersect(rownames(expr_df), cov_df$X)
    if (length(common_ids) < 3) next
    
    df <- tibble(
      sample = common_ids,
      x = cov_df[match(common_ids, cov_df$X), "pTDP43"],
      y = expr_df[common_ids, gene]
    ) %>% drop_na()
    
    if (nrow(df) < 3 || sd(df$x) == 0 || sd(df$y) == 0) next
    
    rho  <- suppressWarnings(cor(df$x, df$y, method = "spearman"))
    pval <- suppressWarnings(cor.test(df$x, df$y, method = "spearman")$p.value)
    
    lmfit <- lm(y ~ x, data = df)
    line_df <- tibble(x = seq(min(df$x), max(df$x), length.out = 100))
    line_df$y <- predict(lmfit, newdata = line_df)
    
    # Scatter
    trace_index <- trace_index + 1
    all_traces[[trace_index]] <- list(
      type = "scatter",
      mode = "markers",
      x = df$x,
      y = df$y,
      name = paste(cell, gene),
      text = paste0("<b>", gene, " in ", cell, "</b><br>ρ = ", round(rho, 3), "<br>p = ", signif(pval, 3)),
      hoverinfo = "text",
      marker = list(size = 7, opacity = 0.75),
      visible = FALSE
    )
    
    # Regression line
    trace_index <- trace_index + 1
    all_traces[[trace_index]] <- list(
      type = "scatter",
      mode = "lines",
      x = line_df$x,
      y = line_df$y,
      line = list(width = 2),
      hoverinfo = "none",
      visible = FALSE
    )
    
    meta <- add_row(meta, trace_id = trace_index - 1,
                    Cell = cell, Gene = gene)
  }
}

# ============================================================
# ---- VISIBILITY HELPER ------------------------------------
# ============================================================

make_vis <- function(cell_sel, gene_sel) {
  vis <- rep(FALSE, length(all_traces))
  combo <- which(meta$Cell == cell_sel & meta$Gene == gene_sel)
  if (length(combo) == 1) {
    idx <- (combo - 1) * 2 + 1
    vis[idx:(idx + 1)] <- TRUE
  }
  vis
}

# ============================================================
# ---- DROPDOWNS: CELL + GENE ONLY ---------------------------
# ============================================================

cells_list <- unique(meta$Cell)
genes_list <- unique(meta$Gene)

buttons_cell <- lapply(cells_list, function(c) {
  list(
    method = "update",
    args = list(list(visible = make_vis(c, genes_list[1])),
                list(title = paste("Cell:", c))),
    label = c
  )
})

buttons_gene <- lapply(genes_list, function(g) {
  list(
    method = "update",
    args = list(list(visible = make_vis(cells_list[1], g)),
                list(title = paste("Gene:", g))),
    label = g
  )
})

# ============================================================
# ---- INITIAL VISIBILITY ------------------------------------
# ============================================================

vis_init <- make_vis(cells_list[1], genes_list[1])
for (k in seq_along(all_traces)) all_traces[[k]]$visible <- vis_init[k]

# ============================================================
# ---- BUILD FIGURE ------------------------------------------
# ============================================================

fig <- plot_ly()
for (tr in all_traces) {
  fig <- fig %>% add_trace(
    type = tr$type,
    mode = tr$mode,
    x = tr$x,
    y = tr$y,
    name = tr$name,
    text = tr$text,
    hoverinfo = tr$hoverinfo,
    marker = tr$marker,
    line = tr$line,
    visible = tr$visible
  )
}

fig <- fig %>%
  layout(
    title = list(text = "pTDP43 Correlation Scatterplots", x = 0.05),
    xaxis = list(title = "pTDP43"),
    yaxis = list(title = "Gene expression"),
    showlegend = FALSE,
    updatemenus = list(
      list(
        y = 1.15, x = 0,
        buttons = buttons_cell,
        direction = "down",
        showactive = TRUE,
        xanchor = "left", yanchor = "top",
        title = list(text = "Cell State")
      ),
      list(
        y = 1.15, x = 0.35,
        buttons = buttons_gene,
        direction = "down",
        showactive = TRUE,
        xanchor = "left", yanchor = "top",
        title = list(text = "Gene")
      )
    )
  )

fig

PolyGP

Code
# ============================================================
# INTERACTIVE SCATTERPLOTS – ONLY pTDP43, SELECT CELL + GENE
# ============================================================

library(dplyr)
library(tidyverse)
library(readxl)
library(plotly)

# ---- Safe Spearman correlation ----
safe_cor <- function(x, y){
  tryCatch({
    if(all(is.na(x)) | all(is.na(y))) return(list(estimate = NA, p.value = NA))
    res <- cor.test(x, y, method = "spearman")
    list(estimate = as.numeric(res$estimate), p.value = res$p.value)
  }, error = function(e) list(estimate = NA, p.value = NA))
}

# ============================================================
# ---- LOAD DATA ------------------------------------------------
# ============================================================

Expression_per_cell_directory <- "/media/jaumatell/datos/URI/BAYESPRISM_12_3/BAYESPRISM/FTLD/C9/CELL STATE ORIGINAL/"
OG_Covariables <- read.csv("/media/jaumatell/datos/URI/BAYESPRISM_12_3/CORRELATION/COVARIABLES/FTD_C9_neuropath_SOM.csv")
OG_Covariables$X <- gsub("^X", "", OG_Covariables$X)

cells <- c("CUX2_RASGRF2","CUX2_RORB","SCN4B_NEFH","RORB_FOXO1",
           "RORB_POU3F2","RORB_ADGRL4","RORB_LRRK1","PCP4_NXPH2",
           "VAT1L_EYA4","VAT1L_THSD4","THEMIS_NR4A2","THEMIS_TMEM233",
           "TLE4_CCBE1","TLE4_MEGF11","TLE4_SEMA3D")

genes <- c("STMN2","NPTX2","C9orf72","CXCL10","SPP1","CX3CR1")
covariables <- "polyGP"  

# ============================================================
# ---- COMPUTE CORRELATIONS ----------------------------------
# ============================================================

results <- data.frame(Cell = character(),
                      Gene = character(),
                      Covariable = character(),
                      Spearman = numeric(),
                      p_value = numeric(),
                      stringsAsFactors = FALSE)

for (cell in cells) {
  file_path <- file.path(Expression_per_cell_directory, paste0(cell, ".csv"))
  if (!file.exists(file_path)) next
  
  data <- read.csv(file_path, row.names = "X")
  rownames(data) <- gsub("^X", "", rownames(data))
  
  for (gene in genes) {
    if (!(gene %in% colnames(data))) next
    
    common_ids <- intersect(rownames(data), OG_Covariables$X)
    subset_gene <- data[common_ids, gene, drop = FALSE]
    subset_cov  <- OG_Covariables[match(common_ids, OG_Covariables$X), ]
    
    result_corr <- safe_cor(subset_gene[[1]], subset_cov[, "pTDP43"])
    
    results <- rbind(results,
                     data.frame(Cell = cell,
                                Gene = gene,
                                Covariable = "pTDP43",
                                Spearman = result_corr$estimate,
                                p_value = result_corr$p.value))
  }
}

results <- results %>% drop_na(Spearman)

# ============================================================
# ---- BUILD SCATTER DATA ------------------------------------
# ============================================================

all_traces <- list()
meta <- tibble(trace_id = integer(), Cell = character(), Gene = character())
trace_index <- 0

for (cell in unique(results$Cell)) {
  
  expr_path <- file.path(Expression_per_cell_directory, paste0(cell, ".csv"))
  if (!file.exists(expr_path)) next
  
  expr_df <- read.csv(expr_path, row.names = 1)
  rownames(expr_df) <- gsub("^X", "", rownames(expr_df))
  
  cov_df <- OG_Covariables[, c("X", "pTDP43")]
  cov_df$X <- gsub("^X", "", cov_df$X)
  cov_df <- cov_df[!is.na(cov_df$pTDP43), ]
  
  for (gene in unique(results$Gene)) {
    if (!(gene %in% colnames(expr_df))) next
    
    common_ids <- intersect(rownames(expr_df), cov_df$X)
    if (length(common_ids) < 3) next
    
    df <- tibble(
      sample = common_ids,
      x = cov_df[match(common_ids, cov_df$X), "pTDP43"],
      y = expr_df[common_ids, gene]
    ) %>% drop_na()
    
    if (nrow(df) < 3 || sd(df$x) == 0 || sd(df$y) == 0) next
    
    rho  <- suppressWarnings(cor(df$x, df$y, method = "spearman"))
    pval <- suppressWarnings(cor.test(df$x, df$y, method = "spearman")$p.value)
    
    lmfit <- lm(y ~ x, data = df)
    line_df <- tibble(x = seq(min(df$x), max(df$x), length.out = 100))
    line_df$y <- predict(lmfit, newdata = line_df)
    
    # Scatter
    trace_index <- trace_index + 1
    all_traces[[trace_index]] <- list(
      type = "scatter",
      mode = "markers",
      x = df$x,
      y = df$y,
      name = paste(cell, gene),
      text = paste0("<b>", gene, " in ", cell, "</b><br>ρ = ", round(rho, 3), "<br>p = ", signif(pval, 3)),
      hoverinfo = "text",
      marker = list(size = 7, opacity = 0.75),
      visible = FALSE
    )
    
    # Regression line
    trace_index <- trace_index + 1
    all_traces[[trace_index]] <- list(
      type = "scatter",
      mode = "lines",
      x = line_df$x,
      y = line_df$y,
      line = list(width = 2),
      hoverinfo = "none",
      visible = FALSE
    )
    
    meta <- add_row(meta, trace_id = trace_index - 1,
                    Cell = cell, Gene = gene)
  }
}

# ============================================================
# ---- VISIBILITY HELPER ------------------------------------
# ============================================================

make_vis <- function(cell_sel, gene_sel) {
  vis <- rep(FALSE, length(all_traces))
  combo <- which(meta$Cell == cell_sel & meta$Gene == gene_sel)
  if (length(combo) == 1) {
    idx <- (combo - 1) * 2 + 1
    vis[idx:(idx + 1)] <- TRUE
  }
  vis
}

# ============================================================
# ---- DROPDOWNS: CELL + GENE ONLY ---------------------------
# ============================================================

cells_list <- unique(meta$Cell)
genes_list <- unique(meta$Gene)

buttons_cell <- lapply(cells_list, function(c) {
  list(
    method = "update",
    args = list(list(visible = make_vis(c, genes_list[1])),
                list(title = paste("Cell:", c))),
    label = c
  )
})

buttons_gene <- lapply(genes_list, function(g) {
  list(
    method = "update",
    args = list(list(visible = make_vis(cells_list[1], g)),
                list(title = paste("Gene:", g))),
    label = g
  )
})

# ============================================================
# ---- INITIAL VISIBILITY ------------------------------------
# ============================================================

vis_init <- make_vis(cells_list[1], genes_list[1])
for (k in seq_along(all_traces)) all_traces[[k]]$visible <- vis_init[k]

# ============================================================
# ---- BUILD FIGURE ------------------------------------------
# ============================================================

fig <- plot_ly()
for (tr in all_traces) {
  fig <- fig %>% add_trace(
    type = tr$type,
    mode = tr$mode,
    x = tr$x,
    y = tr$y,
    name = tr$name,
    text = tr$text,
    hoverinfo = tr$hoverinfo,
    marker = tr$marker,
    line = tr$line,
    visible = tr$visible
  )
}

fig <- fig %>%
  layout(
    title = list(text = "pTDP43 Correlation Scatterplots", x = 0.05),
    xaxis = list(title = "pTDP43"),
    yaxis = list(title = "Gene expression"),
    showlegend = FALSE,
    updatemenus = list(
      list(
        y = 1.15, x = 0,
        buttons = buttons_cell,
        direction = "down",
        showactive = TRUE,
        xanchor = "left", yanchor = "top",
        title = list(text = "Cell State")
      ),
      list(
        y = 1.15, x = 0.35,
        buttons = buttons_gene,
        direction = "down",
        showactive = TRUE,
        xanchor = "left", yanchor = "top",
        title = list(text = "Gene")
      )
    )
  )

fig

PolyGA

Code
# ============================================================
# INTERACTIVE SCATTERPLOTS – ONLY pTDP43, SELECT CELL + GENE
# ============================================================

library(dplyr)
library(tidyverse)
library(readxl)
library(plotly)

# ---- Safe Spearman correlation ----
safe_cor <- function(x, y){
  tryCatch({
    if(all(is.na(x)) | all(is.na(y))) return(list(estimate = NA, p.value = NA))
    res <- cor.test(x, y, method = "spearman")
    list(estimate = as.numeric(res$estimate), p.value = res$p.value)
  }, error = function(e) list(estimate = NA, p.value = NA))
}

# ============================================================
# ---- LOAD DATA ------------------------------------------------
# ============================================================

Expression_per_cell_directory <- "/media/jaumatell/datos/URI/BAYESPRISM_12_3/BAYESPRISM/FTLD/C9/CELL STATE ORIGINAL/"
OG_Covariables <- read.csv("/media/jaumatell/datos/URI/BAYESPRISM_12_3/CORRELATION/COVARIABLES/FTD_C9_neuropath_SOM.csv")
OG_Covariables$X <- gsub("^X", "", OG_Covariables$X)

cells <- c("CUX2_RASGRF2","CUX2_RORB","SCN4B_NEFH","RORB_FOXO1",
           "RORB_POU3F2","RORB_ADGRL4","RORB_LRRK1","PCP4_NXPH2",
           "VAT1L_EYA4","VAT1L_THSD4","THEMIS_NR4A2","THEMIS_TMEM233",
           "TLE4_CCBE1","TLE4_MEGF11","TLE4_SEMA3D")

genes <- c("STMN2","NPTX2","C9orf72","CXCL10","SPP1","CX3CR1")
covariables <- "polyGA"  

# ============================================================
# ---- COMPUTE CORRELATIONS ----------------------------------
# ============================================================

results <- data.frame(Cell = character(),
                      Gene = character(),
                      Covariable = character(),
                      Spearman = numeric(),
                      p_value = numeric(),
                      stringsAsFactors = FALSE)

for (cell in cells) {
  file_path <- file.path(Expression_per_cell_directory, paste0(cell, ".csv"))
  if (!file.exists(file_path)) next
  
  data <- read.csv(file_path, row.names = "X")
  rownames(data) <- gsub("^X", "", rownames(data))
  
  for (gene in genes) {
    if (!(gene %in% colnames(data))) next
    
    common_ids <- intersect(rownames(data), OG_Covariables$X)
    subset_gene <- data[common_ids, gene, drop = FALSE]
    subset_cov  <- OG_Covariables[match(common_ids, OG_Covariables$X), ]
    
    result_corr <- safe_cor(subset_gene[[1]], subset_cov[, "pTDP43"])
    
    results <- rbind(results,
                     data.frame(Cell = cell,
                                Gene = gene,
                                Covariable = "pTDP43",
                                Spearman = result_corr$estimate,
                                p_value = result_corr$p.value))
  }
}

results <- results %>% drop_na(Spearman)

# ============================================================
# ---- BUILD SCATTER DATA ------------------------------------
# ============================================================

all_traces <- list()
meta <- tibble(trace_id = integer(), Cell = character(), Gene = character())
trace_index <- 0

for (cell in unique(results$Cell)) {
  
  expr_path <- file.path(Expression_per_cell_directory, paste0(cell, ".csv"))
  if (!file.exists(expr_path)) next
  
  expr_df <- read.csv(expr_path, row.names = 1)
  rownames(expr_df) <- gsub("^X", "", rownames(expr_df))
  
  cov_df <- OG_Covariables[, c("X", "pTDP43")]
  cov_df$X <- gsub("^X", "", cov_df$X)
  cov_df <- cov_df[!is.na(cov_df$pTDP43), ]
  
  for (gene in unique(results$Gene)) {
    if (!(gene %in% colnames(expr_df))) next
    
    common_ids <- intersect(rownames(expr_df), cov_df$X)
    if (length(common_ids) < 3) next
    
    df <- tibble(
      sample = common_ids,
      x = cov_df[match(common_ids, cov_df$X), "pTDP43"],
      y = expr_df[common_ids, gene]
    ) %>% drop_na()
    
    if (nrow(df) < 3 || sd(df$x) == 0 || sd(df$y) == 0) next
    
    rho  <- suppressWarnings(cor(df$x, df$y, method = "spearman"))
    pval <- suppressWarnings(cor.test(df$x, df$y, method = "spearman")$p.value)
    
    lmfit <- lm(y ~ x, data = df)
    line_df <- tibble(x = seq(min(df$x), max(df$x), length.out = 100))
    line_df$y <- predict(lmfit, newdata = line_df)
    
    # Scatter
    trace_index <- trace_index + 1
    all_traces[[trace_index]] <- list(
      type = "scatter",
      mode = "markers",
      x = df$x,
      y = df$y,
      name = paste(cell, gene),
      text = paste0("<b>", gene, " in ", cell, "</b><br>ρ = ", round(rho, 3), "<br>p = ", signif(pval, 3)),
      hoverinfo = "text",
      marker = list(size = 7, opacity = 0.75),
      visible = FALSE
    )
    
    # Regression line
    trace_index <- trace_index + 1
    all_traces[[trace_index]] <- list(
      type = "scatter",
      mode = "lines",
      x = line_df$x,
      y = line_df$y,
      line = list(width = 2),
      hoverinfo = "none",
      visible = FALSE
    )
    
    meta <- add_row(meta, trace_id = trace_index - 1,
                    Cell = cell, Gene = gene)
  }
}

# ============================================================
# ---- VISIBILITY HELPER ------------------------------------
# ============================================================

make_vis <- function(cell_sel, gene_sel) {
  vis <- rep(FALSE, length(all_traces))
  combo <- which(meta$Cell == cell_sel & meta$Gene == gene_sel)
  if (length(combo) == 1) {
    idx <- (combo - 1) * 2 + 1
    vis[idx:(idx + 1)] <- TRUE
  }
  vis
}

# ============================================================
# ---- DROPDOWNS: CELL + GENE ONLY ---------------------------
# ============================================================

cells_list <- unique(meta$Cell)
genes_list <- unique(meta$Gene)

buttons_cell <- lapply(cells_list, function(c) {
  list(
    method = "update",
    args = list(list(visible = make_vis(c, genes_list[1])),
                list(title = paste("Cell:", c))),
    label = c
  )
})

buttons_gene <- lapply(genes_list, function(g) {
  list(
    method = "update",
    args = list(list(visible = make_vis(cells_list[1], g)),
                list(title = paste("Gene:", g))),
    label = g
  )
})

# ============================================================
# ---- INITIAL VISIBILITY ------------------------------------
# ============================================================

vis_init <- make_vis(cells_list[1], genes_list[1])
for (k in seq_along(all_traces)) all_traces[[k]]$visible <- vis_init[k]

# ============================================================
# ---- BUILD FIGURE ------------------------------------------
# ============================================================

fig <- plot_ly()
for (tr in all_traces) {
  fig <- fig %>% add_trace(
    type = tr$type,
    mode = tr$mode,
    x = tr$x,
    y = tr$y,
    name = tr$name,
    text = tr$text,
    hoverinfo = tr$hoverinfo,
    marker = tr$marker,
    line = tr$line,
    visible = tr$visible
  )
}

fig <- fig %>%
  layout(
    title = list(text = "pTDP43 Correlation Scatterplots", x = 0.05),
    xaxis = list(title = "pTDP43"),
    yaxis = list(title = "Gene expression"),
    showlegend = FALSE,
    updatemenus = list(
      list(
        y = 1.15, x = 0,
        buttons = buttons_cell,
        direction = "down",
        showactive = TRUE,
        xanchor = "left", yanchor = "top",
        title = list(text = "Cell State")
      ),
      list(
        y = 1.15, x = 0.35,
        buttons = buttons_gene,
        direction = "down",
        showactive = TRUE,
        xanchor = "left", yanchor = "top",
        title = list(text = "Gene")
      )
    )
  )

fig

PolyGA

Code
# ============================================================
# INTERACTIVE SCATTERPLOTS – ONLY pTDP43, SELECT CELL + GENE
# ============================================================

library(dplyr)
library(tidyverse)
library(readxl)
library(plotly)

# ---- Safe Spearman correlation ----
safe_cor <- function(x, y){
  tryCatch({
    if(all(is.na(x)) | all(is.na(y))) return(list(estimate = NA, p.value = NA))
    res <- cor.test(x, y, method = "spearman")
    list(estimate = as.numeric(res$estimate), p.value = res$p.value)
  }, error = function(e) list(estimate = NA, p.value = NA))
}

# ============================================================
# ---- LOAD DATA ------------------------------------------------
# ============================================================

Expression_per_cell_directory <- "/media/jaumatell/datos/URI/BAYESPRISM_12_3/BAYESPRISM/FTLD/C9/CELL STATE ORIGINAL/"
OG_Covariables <- read.csv("/media/jaumatell/datos/URI/BAYESPRISM_12_3/CORRELATION/COVARIABLES/FTD_C9_neuropath_SOM.csv")
OG_Covariables$X <- gsub("^X", "", OG_Covariables$X)

cells <- c("CUX2_RASGRF2","CUX2_RORB","SCN4B_NEFH","RORB_FOXO1",
           "RORB_POU3F2","RORB_ADGRL4","RORB_LRRK1","PCP4_NXPH2",
           "VAT1L_EYA4","VAT1L_THSD4","THEMIS_NR4A2","THEMIS_TMEM233",
           "TLE4_CCBE1","TLE4_MEGF11","TLE4_SEMA3D")

genes <- c("STMN2","NPTX2","C9orf72","CXCL10","SPP1","CX3CR1")
covariables <- "polyGP"  

# ============================================================
# ---- COMPUTE CORRELATIONS ----------------------------------
# ============================================================

results <- data.frame(Cell = character(),
                      Gene = character(),
                      Covariable = character(),
                      Spearman = numeric(),
                      p_value = numeric(),
                      stringsAsFactors = FALSE)

for (cell in cells) {
  file_path <- file.path(Expression_per_cell_directory, paste0(cell, ".csv"))
  if (!file.exists(file_path)) next
  
  data <- read.csv(file_path, row.names = "X")
  rownames(data) <- gsub("^X", "", rownames(data))
  
  for (gene in genes) {
    if (!(gene %in% colnames(data))) next
    
    common_ids <- intersect(rownames(data), OG_Covariables$X)
    subset_gene <- data[common_ids, gene, drop = FALSE]
    subset_cov  <- OG_Covariables[match(common_ids, OG_Covariables$X), ]
    
    result_corr <- safe_cor(subset_gene[[1]], subset_cov[, "polyGP"])
    
    results <- rbind(results,
                     data.frame(Cell = cell,
                                Gene = gene,
                                Covariable = "polyGP",
                                Spearman = result_corr$estimate,
                                p_value = result_corr$p.value))
  }
}

results <- results %>% drop_na(Spearman)

# ============================================================
# ---- BUILD SCATTER DATA ------------------------------------
# ============================================================

all_traces <- list()
meta <- tibble(trace_id = integer(), Cell = character(), Gene = character())
trace_index <- 0

for (cell in unique(results$Cell)) {
  
  expr_path <- file.path(Expression_per_cell_directory, paste0(cell, ".csv"))
  if (!file.exists(expr_path)) next
  
  expr_df <- read.csv(expr_path, row.names = 1)
  rownames(expr_df) <- gsub("^X", "", rownames(expr_df))
  
  cov_df <- OG_Covariables[, c("X", "polyGP")]
  cov_df$X <- gsub("^X", "", cov_df$X)
  cov_df <- cov_df[!is.na(cov_df$pTDP43), ]
  
  for (gene in unique(results$Gene)) {
    if (!(gene %in% colnames(expr_df))) next
    
    common_ids <- intersect(rownames(expr_df), cov_df$X)
    if (length(common_ids) < 3) next
    
    df <- tibble(
      sample = common_ids,
      x = cov_df[match(common_ids, cov_df$X), "polyGP"],
      y = expr_df[common_ids, gene]
    ) %>% drop_na()
    
    if (nrow(df) < 3 || sd(df$x) == 0 || sd(df$y) == 0) next
    
    rho  <- suppressWarnings(cor(df$x, df$y, method = "spearman"))
    pval <- suppressWarnings(cor.test(df$x, df$y, method = "spearman")$p.value)
    
    lmfit <- lm(y ~ x, data = df)
    line_df <- tibble(x = seq(min(df$x), max(df$x), length.out = 100))
    line_df$y <- predict(lmfit, newdata = line_df)
    
    # Scatter
    trace_index <- trace_index + 1
    all_traces[[trace_index]] <- list(
      type = "scatter",
      mode = "markers",
      x = df$x,
      y = df$y,
      name = paste(cell, gene),
      text = paste0("<b>", gene, " in ", cell, "</b><br>ρ = ", round(rho, 3), "<br>p = ", signif(pval, 3)),
      hoverinfo = "text",
      marker = list(size = 7, opacity = 0.75),
      visible = FALSE
    )
    
    # Regression line
    trace_index <- trace_index + 1
    all_traces[[trace_index]] <- list(
      type = "scatter",
      mode = "lines",
      x = line_df$x,
      y = line_df$y,
      line = list(width = 2),
      hoverinfo = "none",
      visible = FALSE
    )
    
    meta <- add_row(meta, trace_id = trace_index - 1,
                    Cell = cell, Gene = gene)
  }
}

# ============================================================
# ---- VISIBILITY HELPER ------------------------------------
# ============================================================

make_vis <- function(cell_sel, gene_sel) {
  vis <- rep(FALSE, length(all_traces))
  combo <- which(meta$Cell == cell_sel & meta$Gene == gene_sel)
  if (length(combo) == 1) {
    idx <- (combo - 1) * 2 + 1
    vis[idx:(idx + 1)] <- TRUE
  }
  vis
}

# ============================================================
# ---- DROPDOWNS: CELL + GENE ONLY ---------------------------
# ============================================================

cells_list <- unique(meta$Cell)
genes_list <- unique(meta$Gene)

buttons_cell <- lapply(cells_list, function(c) {
  list(
    method = "update",
    args = list(list(visible = make_vis(c, genes_list[1])),
                list(title = paste("Cell:", c))),
    label = c
  )
})

buttons_gene <- lapply(genes_list, function(g) {
  list(
    method = "update",
    args = list(list(visible = make_vis(cells_list[1], g)),
                list(title = paste("Gene:", g))),
    label = g
  )
})

# ============================================================
# ---- INITIAL VISIBILITY ------------------------------------
# ============================================================

vis_init <- make_vis(cells_list[1], genes_list[1])
for (k in seq_along(all_traces)) all_traces[[k]]$visible <- vis_init[k]

# ============================================================
# ---- BUILD FIGURE ------------------------------------------
# ============================================================

fig <- plot_ly()
for (tr in all_traces) {
  fig <- fig %>% add_trace(
    type = tr$type,
    mode = tr$mode,
    x = tr$x,
    y = tr$y,
    name = tr$name,
    text = tr$text,
    hoverinfo = tr$hoverinfo,
    marker = tr$marker,
    line = tr$line,
    visible = tr$visible
  )
}

fig <- fig %>%
  layout(
    title = list(text = "polyGP Correlation Scatterplots", x = 0.05),
    xaxis = list(title = "polyGP"),
    yaxis = list(title = "Gene expression"),
    showlegend = FALSE,
    updatemenus = list(
      list(
        y = 1.15, x = 0,
        buttons = buttons_cell,
        direction = "down",
        showactive = TRUE,
        xanchor = "left", yanchor = "top",
        title = list(text = "Cell State")
      ),
      list(
        y = 1.15, x = 0.35,
        buttons = buttons_gene,
        direction = "down",
        showactive = TRUE,
        xanchor = "left", yanchor = "top",
        title = list(text = "Gene")
      )
    )
  )

fig

TDP

In excitatory neurons, correlations with TDP-43 burden revealed strong and consistent associations with STMN2 expression and TDP43b. The RORB/ADGRL4 subtype showed SPP1 (-0.81, 0.0078) and CX3CR1 (-0.79, 0.01) negatively correlating with TDP43 burden. RORB/LRRK1 subtype showed one of the most pronounced effects in correlation with TDP43b, with genes NPTX2 (ρ = −0.78, p = 0,017), STMN2 (ρ = −0.8, p = 0,013), C9orf72 (ρ = −0,8, p = 0,013), SPP1(ρ = −0.73, p = 0,031) and CX3CR1 (ρ = −0.71, p = 0,031) significativly negatively correlated.

Other excitatory subtypes also showed significant negative correlations with TDP43b, including CUX2/RASGRF2 with NPTX2 (ρ = −0.7, p = 0.043) and RORB/POU3F2 with NPTX2 (ρ = −0.75 , p = 0.025) and CX3CR1 (ρ = −0.8 , p = 0.013).

Additionaly neuropathology marker STMN2 in CUX2/RORB also correlated negatively with NPTX2expression (ρ = −0.80 , p = 0.016). Together, these findings point to a robust inverse association between excitatory neuron signatures and STMN2 levels in the context of TDP-43 pathology, suggesting a central role of excitatory neuronal dysfunction in linking STMN2 downregulation with TDP-43 accumulation.

In non-neuronal populations, strong correlations with STMN2 were also observed. In TLE4/CCBE1 cells STMN2 pathology showed negative correlation, with C9orf72 (ρ = -0.76, p = 0.025), also the astrocyte GFAP - correlated negatively with C9orf72 in relation to TDP-43 burden (ρ = −0.73, p = 0.031).

These results highlight that astrocyte populations, and TLE4/CCBE1, display heterogeneous but significant associations between C9orf72 and TDP-43 burden, complementing the strong excitatory neuron correlations and underscoring the interplay between neuronal vulnerability and glial responses in FTLD-TDP.

Code
library(readxl)
library(dplyr)
library(DT)

# ============================================================
# LOAD DATA
# ============================================================

file_path <- "/media/jaumatell/datos/URI/BAYESPRISM_12_3/RESULTS_ORDERED/6. GENES OF INTERST CORRELATION/ALL_genes_correlation.xlsx"

filtered <- readxl::read_excel(file_path, sheet = 1)

# ============================================================
# CLEAN COLUMN NAMES IF NEEDED
# Some sheets have "NA." or "rho" in first column.
# Keep only meaningful columns: Cell, Gene, Spearman, p_value.
# ============================================================

# Try to detect the first unnamed column (if present)
first_col <- colnames(filtered)[1]
if (first_col %in% c("NA.", "rho", "rho1", "", NA)) {
  filtered <- filtered %>% select(-1)
}

# ============================================================
# FILTERABLE HTML TABLE FOR QUARTO
# ============================================================

datatable(
  filtered,
  filter = "top",
  options = list(
    scrollX = TRUE,
    pageLength = 25,
    dom = "frtip"   # no download buttons
  )
)

pTDP43

Code
# ============================================================
# INTERACTIVE SPEARMAN SCATTERPLOTS – ONLY TDP43b (TDP DATASET)
# ============================================================

library(dplyr)
library(tidyverse)
library(plotly)

# ---- Safe Spearman correlation ----
safe_cor <- function(x, y){
  tryCatch({
    if(all(is.na(x)) | all(is.na(y))) return(list(estimate = NA, p.value = NA))
    res <- cor.test(x, y, method = "spearman")
    list(estimate = as.numeric(res$estimate), p.value = res$p.value)
  }, error = function(e) list(estimate = NA, p.value = NA))
}

clean_ids <- function(x){
  x %>%
    as.character() %>%
    trimws() %>%
    toupper() %>%
    gsub("^(X|LONG)", "", .)
}

# ============================================================
# ---- Load data ---------------------------------------------
# ============================================================

Expression_per_cell_directory <- "/media/jaumatell/datos/URI/BAYESPRISM_12_3/BAYESPRISM/FTLD/TDP/CELL STATE ORIGINAL/"
OG_Covariables <- read.csv("/media/jaumatell/datos/URI/BAYESPRISM_12_3/CORRELATION/COVARIABLES/FTD_TDP_neuropath_SOM.csv")

OG_Covariables$X <- clean_ids(OG_Covariables$X)

# Force **only TDP43b**
covariable_to_use <- "TDP43b"

cells <- c("CUX2_RASGRF2","CUX2_RORB","SCN4B_NEFH","RORB_FOXO1",
           "RORB_POU3F2","RORB_ADGRL4","RORB_LRRK1","PCP4_NXPH2",
           "VAT1L_EYA4","VAT1L_THSD4","THEMIS_NR4A2","THEMIS_TMEM233",
           "TLE4_CCBE1","TLE4_MEGF11","TLE4_SEMA3D")

genes <- c("STMN2","NPTX2","C9orf72","CXCL10","SPP1","CX3CR1")

# ============================================================
# ---- Compute correlations -----------------------------------
# ============================================================

results <- data.frame(Cell=character(),Gene=character(),
                      Spearman=numeric(),p_value=numeric(),
                      stringsAsFactors=FALSE)

for (cell in cells) {
  
  f <- file.path(Expression_per_cell_directory, paste0(cell, ".csv"))
  if (!file.exists(f)) next
  
  expr <- read.csv(f, row.names=1)
  rownames(expr) <- clean_ids(rownames(expr))
  
  for (gene in genes) {
    if (!(gene %in% colnames(expr))) next
    
    common_ids <- intersect(rownames(expr), OG_Covariables$X)
    if (length(common_ids) < 3) next
    
    gvals <- expr[common_ids, gene]
    covvals <- OG_Covariables[match(common_ids, OG_Covariables$X), covariable_to_use]
    
    rc <- safe_cor(gvals, covvals)
    
    results <- rbind(results,
                     data.frame(Cell=cell,Gene=gene,
                                Spearman=rc$estimate,
                                p_value=rc$p.value))
  }
}

results <- results %>% drop_na(Spearman)

# ============================================================
# ---- Build scatterplots -------------------------------------
# ============================================================

all_traces <- list()
meta <- tibble(Cell=character(), Gene=character(), trace_id=integer())
trace_index <- 0

cov_df <- OG_Covariables[, c("X", covariable_to_use)]
cov_df$X <- clean_ids(cov_df$X)
cov_df <- cov_df[!is.na(cov_df[[covariable_to_use]]), ]

for (cell in unique(results$Cell)) {
  
  expr_path <- file.path(Expression_per_cell_directory, paste0(cell, ".csv"))
  if (!file.exists(expr_path)) next
  
  expr_df <- read.csv(expr_path, row.names=1)
  rownames(expr_df) <- clean_ids(rownames(expr_df))
  
  for (gene in unique(results$Gene)) {
    if (!(gene %in% colnames(expr_df))) next
    
    common_ids <- intersect(rownames(expr_df), cov_df$X)
    if (length(common_ids) < 3) next
    
    df <- tibble(
      sample = common_ids,
      x = cov_df[match(common_ids, cov_df$X), covariable_to_use],
      y = expr_df[common_ids, gene]
    ) %>% drop_na()
    
    if (nrow(df) < 3 || sd(df$x)==0 || sd(df$y)==0) next
    
    rho  <- suppressWarnings(cor(df$x, df$y, method="spearman"))
    pval <- suppressWarnings(cor.test(df$x, df$y, method="spearman")$p.value)
    
    lmfit <- lm(y~x, data=df)
    line_df <- tibble(x = seq(min(df$x), max(df$x), length.out=100))
    line_df$y <- predict(lmfit, newdata=line_df)
    
    # Scatter
    trace_index <- trace_index + 1
    all_traces[[trace_index]] <- list(
      type="scatter", mode="markers",
      x=df$x, y=df$y,
      text=paste0("<b>",gene," in ",cell,"</b><br>",
                  "ρ = ",round(rho,3),"<br>",
                  "p = ",signif(pval,3)),
      hoverinfo="text",
      marker=list(size=7,opacity=0.7),
      visible=FALSE
    )
    
    # Regression
    trace_index <- trace_index + 1
    all_traces[[trace_index]] <- list(
      type="scatter", mode="lines",
      x=line_df$x, y=line_df$y,
      line=list(width=2),
      visible=FALSE
    )
    
    meta <- add_row(meta, Cell=cell, Gene=gene, trace_id=trace_index-1)
  }
}

# ============================================================
# ---- Visibility helper (2-dimensional) ----------------------
# ============================================================

make_vis <- function(cell_sel, gene_sel){
  vis <- rep(FALSE, length(all_traces))
  combo <- which(meta$Cell==cell_sel & meta$Gene==gene_sel)
  if (length(combo)==1){
    idx <- (combo-1)*2 + 1
    vis[idx:(idx+1)] <- TRUE
  }
  vis
}

# ============================================================
# ---- Dropdowns: Cell + Gene only ----------------------------
# ============================================================

cells_list <- unique(meta$Cell)
genes_list <- unique(meta$Gene)

buttons_cell <- lapply(cells_list, function(c){
  list(method="update",
       args=list(list(visible = make_vis(c, genes_list[1])),
                 list(title=paste("Cell:",c))),
       label=c)
})

buttons_gene <- lapply(genes_list, function(g){
  list(method="update",
       args=list(list(visible = make_vis(cells_list[1], g)),
                 list(title=paste("Gene:",g))),
       label=g)
})

# ============================================================
# ---- Initial visibility -------------------------------------
# ============================================================

initial_vis <- make_vis(cells_list[1], genes_list[1])
for(i in seq_along(all_traces)) all_traces[[i]]$visible <- initial_vis[i]

# ============================================================
# ---- Build figure -------------------------------------------
# ============================================================

fig <- plot_ly()
for(tr in all_traces){
  fig <- fig %>% add_trace(
    type=tr$type, mode=tr$mode,
    x=tr$x, y=tr$y,
    text=tr$text, hoverinfo=tr$hoverinfo,
    marker=tr$marker, line=tr$line,
    visible=tr$visible
  )
}

fig <- fig %>%
  layout(
    title=list(text="TDP43b Correlation Scatterplots", x=0.05),
    xaxis=list(title="TDP43b"),
    yaxis=list(title="Gene expression"),
    showlegend=FALSE,
    updatemenus=list(
      list(
        y=1.15, x=0.0,
        buttons=buttons_cell,
        direction="down",
        showactive=TRUE,
        title=list(text="Cell State")
      ),
      list(
        y=1.15, x=0.35,
        buttons=buttons_gene,
        direction="down",
        showactive=TRUE,
        title=list(text="Gene")
      )
    )
  )

fig

STMN2

Code
# ============================================================
# INTERACTIVE SPEARMAN SCATTERPLOTS – ONLY TDP43b (TDP DATASET)
# ============================================================

library(dplyr)
library(tidyverse)
library(plotly)

# ---- Safe Spearman correlation ----
safe_cor <- function(x, y){
  tryCatch({
    if(all(is.na(x)) | all(is.na(y))) return(list(estimate = NA, p.value = NA))
    res <- cor.test(x, y, method = "spearman")
    list(estimate = as.numeric(res$estimate), p.value = res$p.value)
  }, error = function(e) list(estimate = NA, p.value = NA))
}

clean_ids <- function(x){
  x %>%
    as.character() %>%
    trimws() %>%
    toupper() %>%
    gsub("^(X|LONG)", "", .)
}

# ============================================================
# ---- Load data ---------------------------------------------
# ============================================================

Expression_per_cell_directory <- "/media/jaumatell/datos/URI/BAYESPRISM_12_3/BAYESPRISM/FTLD/TDP/CELL STATE ORIGINAL/"
OG_Covariables <- read.csv("/media/jaumatell/datos/URI/BAYESPRISM_12_3/CORRELATION/COVARIABLES/FTD_TDP_neuropath_SOM.csv")

OG_Covariables$X <- clean_ids(OG_Covariables$X)

# Force **only TDP43b**
covariable_to_use <- "STMN2"

cells <- c("CUX2_RASGRF2","CUX2_RORB","SCN4B_NEFH","RORB_FOXO1",
           "RORB_POU3F2","RORB_ADGRL4","RORB_LRRK1","PCP4_NXPH2",
           "VAT1L_EYA4","VAT1L_THSD4","THEMIS_NR4A2","THEMIS_TMEM233",
           "TLE4_CCBE1","TLE4_MEGF11","TLE4_SEMA3D")

genes <- c("STMN2","NPTX2","C9orf72","CXCL10","SPP1","CX3CR1")

# ============================================================
# ---- Compute correlations -----------------------------------
# ============================================================

results <- data.frame(Cell=character(),Gene=character(),
                      Spearman=numeric(),p_value=numeric(),
                      stringsAsFactors=FALSE)

for (cell in cells) {
  
  f <- file.path(Expression_per_cell_directory, paste0(cell, ".csv"))
  if (!file.exists(f)) next
  
  expr <- read.csv(f, row.names=1)
  rownames(expr) <- clean_ids(rownames(expr))
  
  for (gene in genes) {
    if (!(gene %in% colnames(expr))) next
    
    common_ids <- intersect(rownames(expr), OG_Covariables$X)
    if (length(common_ids) < 3) next
    
    gvals <- expr[common_ids, gene]
    covvals <- OG_Covariables[match(common_ids, OG_Covariables$X), covariable_to_use]
    
    rc <- safe_cor(gvals, covvals)
    
    results <- rbind(results,
                     data.frame(Cell=cell,Gene=gene,
                                Spearman=rc$estimate,
                                p_value=rc$p.value))
  }
}

results <- results %>% drop_na(Spearman)

# ============================================================
# ---- Build scatterplots -------------------------------------
# ============================================================

all_traces <- list()
meta <- tibble(Cell=character(), Gene=character(), trace_id=integer())
trace_index <- 0

cov_df <- OG_Covariables[, c("X", covariable_to_use)]
cov_df$X <- clean_ids(cov_df$X)
cov_df <- cov_df[!is.na(cov_df[[covariable_to_use]]), ]

for (cell in unique(results$Cell)) {
  
  expr_path <- file.path(Expression_per_cell_directory, paste0(cell, ".csv"))
  if (!file.exists(expr_path)) next
  
  expr_df <- read.csv(expr_path, row.names=1)
  rownames(expr_df) <- clean_ids(rownames(expr_df))
  
  for (gene in unique(results$Gene)) {
    if (!(gene %in% colnames(expr_df))) next
    
    common_ids <- intersect(rownames(expr_df), cov_df$X)
    if (length(common_ids) < 3) next
    
    df <- tibble(
      sample = common_ids,
      x = cov_df[match(common_ids, cov_df$X), covariable_to_use],
      y = expr_df[common_ids, gene]
    ) %>% drop_na()
    
    if (nrow(df) < 3 || sd(df$x)==0 || sd(df$y)==0) next
    
    rho  <- suppressWarnings(cor(df$x, df$y, method="spearman"))
    pval <- suppressWarnings(cor.test(df$x, df$y, method="spearman")$p.value)
    
    lmfit <- lm(y~x, data=df)
    line_df <- tibble(x = seq(min(df$x), max(df$x), length.out=100))
    line_df$y <- predict(lmfit, newdata=line_df)
    
    # Scatter
    trace_index <- trace_index + 1
    all_traces[[trace_index]] <- list(
      type="scatter", mode="markers",
      x=df$x, y=df$y,
      text=paste0("<b>",gene," in ",cell,"</b><br>",
                  "ρ = ",round(rho,3),"<br>",
                  "p = ",signif(pval,3)),
      hoverinfo="text",
      marker=list(size=7,opacity=0.7),
      visible=FALSE
    )
    
    # Regression
    trace_index <- trace_index + 1
    all_traces[[trace_index]] <- list(
      type="scatter", mode="lines",
      x=line_df$x, y=line_df$y,
      line=list(width=2),
      visible=FALSE
    )
    
    meta <- add_row(meta, Cell=cell, Gene=gene, trace_id=trace_index-1)
  }
}

# ============================================================
# ---- Visibility helper (2-dimensional) ----------------------
# ============================================================

make_vis <- function(cell_sel, gene_sel){
  vis <- rep(FALSE, length(all_traces))
  combo <- which(meta$Cell==cell_sel & meta$Gene==gene_sel)
  if (length(combo)==1){
    idx <- (combo-1)*2 + 1
    vis[idx:(idx+1)] <- TRUE
  }
  vis
}

# ============================================================
# ---- Dropdowns: Cell + Gene only ----------------------------
# ============================================================

cells_list <- unique(meta$Cell)
genes_list <- unique(meta$Gene)

buttons_cell <- lapply(cells_list, function(c){
  list(method="update",
       args=list(list(visible = make_vis(c, genes_list[1])),
                 list(title=paste("Cell:",c))),
       label=c)
})

buttons_gene <- lapply(genes_list, function(g){
  list(method="update",
       args=list(list(visible = make_vis(cells_list[1], g)),
                 list(title=paste("Gene:",g))),
       label=g)
})

# ============================================================
# ---- Initial visibility -------------------------------------
# ============================================================

initial_vis <- make_vis(cells_list[1], genes_list[1])
for(i in seq_along(all_traces)) all_traces[[i]]$visible <- initial_vis[i]

# ============================================================
# ---- Build figure -------------------------------------------
# ============================================================

fig <- plot_ly()
for(tr in all_traces){
  fig <- fig %>% add_trace(
    type=tr$type, mode=tr$mode,
    x=tr$x, y=tr$y,
    text=tr$text, hoverinfo=tr$hoverinfo,
    marker=tr$marker, line=tr$line,
    visible=tr$visible
  )
}

fig <- fig %>%
  layout(
    title=list(text="TDP43b Correlation Scatterplots", x=0.05),
    xaxis=list(title="TDP43b"),
    yaxis=list(title="Gene expression"),
    showlegend=FALSE,
    updatemenus=list(
      list(
        y=1.15, x=0.0,
        buttons=buttons_cell,
        direction="down",
        showactive=TRUE,
        title=list(text="Cell State")
      ),
      list(
        y=1.15, x=0.35,
        buttons=buttons_gene,
        direction="down",
        showactive=TRUE,
        title=list(text="Gene")
      )
    )
  )

fig